package cn.xo68.boot.webgather.job.txtnovel;

import cn.xo68.boot.job.JobProvider;
import cn.xo68.boot.job.entity.QuartzJob;
import cn.xo68.boot.webgather.common.GatherStatusEnums;
import cn.xo68.boot.webgather.document.txtnovel.TxtNovelDoc;
import cn.xo68.boot.webgather.document.txtnovel.TxtNovelLinkGatherConfig;
import cn.xo68.boot.webgather.resolve.ContentResolve;
import cn.xo68.boot.webgather.resolve.ResolveFactory;
import cn.xo68.boot.webgather.service.txtnovel.TxtNovelGatherService;
import cn.xo68.core.date.DateTime;
import cn.xo68.core.util.JsonUtil;
import cn.xo68.core.util.StringTools;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.quartz.JobExecutionContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
import org.springframework.stereotype.Component;

import java.net.URL;


/**
 * 文本小说采集提供者
 * {
 *   "resolveType": "OK_HTTP",
 *   "listPageGatherConfig":{
 *     "url": "http://m.555x.org/html/kehuanxiaoshuo6/list_74_${pageIndex}.html",
 *   "hasPage": true,
 *   "minPageIndex": 1,
 *   "maxPageIndex": 3,
 *   "listQuery": ".content .searchlist a"
 *   }
 * }
 */
@ConditionalOnMissingBean(name = "txtNovelGatherJobProvider")
@Component("txtNovelGatherJobProvider")
public class TxtNovelLinkGatherJobProvider implements JobProvider {

    private final static Logger logger = LoggerFactory.getLogger(TxtNovelLinkGatherJobProvider.class);

    @Autowired(required = false)
    private ResolveFactory resolveFactory;
    @Autowired
    private JsonUtil jsonUtil;
    @Autowired(required = false)
    private TxtNovelGatherService txtNovelGatherService;

    @Override
    public void execute(JobExecutionContext context, QuartzJob quartzJob) {
        TxtNovelLinkGatherConfig txtNovelLinkGatherConfig =jsonUtil.parse(quartzJob.getParameters(), TxtNovelLinkGatherConfig.class);
        logger.debug("txtNovelLinkGatherConfig:  {}", txtNovelLinkGatherConfig);
        if(txtNovelLinkGatherConfig ==null){
            logger.error("参数不正确或为空");
            return;
        }
        String pageUrl= StringTools.EMPTY;
        URL url=null;
        String urlStr= txtNovelLinkGatherConfig.getListPageGatherConfig().getUrl();

        if(txtNovelLinkGatherConfig.getListPageGatherConfig().isHasPage()){
            for (int pageIndex = txtNovelLinkGatherConfig.getListPageGatherConfig().getMinPageIndex(); pageIndex <= txtNovelLinkGatherConfig.getListPageGatherConfig().getMaxPageIndex(); pageIndex++){
                url=null;
                try {
                    //"https://blog.csdn.net/nnsword"
                    urlStr= txtNovelLinkGatherConfig.getListPageGatherConfig().getUrl().replace("${pageIndex}", String.valueOf(pageIndex));
                    logger.info("列表地址：{}", urlStr);
                    gatherListPage(quartzJob, txtNovelLinkGatherConfig, urlStr);
                } catch (Throwable e) {
                    logger.error("采集["+urlStr+"]异常", e);
                    return;
                }


                try {
                    Thread.sleep(200);
                } catch (InterruptedException e) {
                    logger.error("采集列表时休眠发生异常", e);
                }
            }
        }else {

            try {
                gatherListPage(quartzJob, txtNovelLinkGatherConfig, urlStr);
            } catch (Throwable e) {
                logger.error("采集["+urlStr+"]异常", e);
                return;
            }

        }
    }

    private void gatherListPage(QuartzJob quartzJob, TxtNovelLinkGatherConfig txtNovelLinkGatherConfig, String listUrlStr) throws Throwable {
        ContentResolve contentResolveDefault=resolveFactory.getContentResolve(txtNovelLinkGatherConfig.getResolveType(), listUrlStr);

        Elements eles = contentResolveDefault.listElements(txtNovelLinkGatherConfig.getListPageGatherConfig().getListQuery());
        for (Element ele: eles){
            logger.debug("链接文本：{}，链接地址：{}", ele.text(),ele.attr("href"));
            //gatherContent(ele, parameters);


            String urlStr = ele.attr("href");
            if(urlStr!=null){
                urlStr = urlStr.trim();
            }
            boolean exist = txtNovelGatherService.exist(urlStr);
            if(!exist){
                TxtNovelDoc txtNovelDoc=new TxtNovelDoc();

                txtNovelDoc.setTxtNovelLinkGatherConfig(txtNovelLinkGatherConfig);
                txtNovelDoc.setGatherStatus(GatherStatusEnums.WAITGATHING);
                txtNovelDoc.setNovelName(ele.text());
                txtNovelDoc.setNovelUrl(urlStr);
                txtNovelDoc.setLinkGatherTime(DateTime.Now().getDate());
                txtNovelGatherService.insert(txtNovelDoc);
            }
        }

        Thread.sleep(500);
    }
}
